******************************************
* Title: rwanda_jde_tableA14.do
* Author: Todd Pugatch
* Last update: June 10 2024
* Description: analysis for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
* Inputs: 	student_merge_jde.dta
*			rwanda_school_trainattend_jde.dta
*			teacher_merge_jde.dta
* Outputs: 	rwanda_jde_tableA14.[txt/out]
* Notes: produces Table A14
*******************************************

local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
program drop _all
cap log close
set more off

* Set directories 
*global main "[SET MAIN DIRECTORY HERE]"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global temp "$main/04_output"

* begin log file
log using "$temp/rwanda_jde_tableA14.txt", text replace

*******************************************************
* SET UP MULTIPLE HYPOTHESIS CORRECTION
/*Proceed in 3 steps:
	1. Run all regressions for H1. Save coefficients & std. errors.
	2. Calculate q-values for all hypotheses.
	3. Re-run regressions to get output, including q-values.*/
*******************************************************

* 1. GET COEF/SE/P FOR ALL REGRESSIONS	
* PREPARE DATA
qui use "$cleandata/student_merge_jde.dta", clear

* winsorize all financial variables used in analysis
foreach w in el bl {
	winsor earn_last2mths_usd_`w', gen(earn_last2mths_usdw_`w') p(.01) highonly
	lab var earn_last2mths_usdw_`w' "earn_last2mths_usd_`w', winsorized at 99th percentile"
}

* prep missing values for students without baseline outcome
local Y0 "ownbusiness_bl earn_money_bl earn_last2mths_usdw_bl"
foreach y0 in `Y0' {
	qui gen `y0'i=`y0'
	qui replace `y0'i=r(mean) if `y0'==.	
	qui su `y0' if treatment==0
	qui gen `y0'm=(`y0'==.)
	lab var `y0'i "`y0', imputing control mean for missing values"
	lab var `y0'm "missing value for `y0'"
}

* generate interaction terms between treatment and potential mechanisms
/*merge with administrative data on teacher take-up*/
local X "training_any_pct exchange_pct"
merge m:1 school_code using "$cleandata/rwanda_school_trainattend_jde.dta", keepusing(`X')
drop _merge

/*teacher pedagogy:
	Merge student to teacher pedagogy by school, in 2 ways:
		1. only if linked by teacher name across student/teacher endline
		2. school-level mean
		Trade-off is extra precision but loss of observations for measure #1.*/

* measure #1: linked by individual teacher ID
local X "mainact_teacher_active_pct	method_curric_pct_el"
ren teacherid_link teacherid
merge m:1 school_code teacherid using "$cleandata/teacher_merge_jde.dta", keepusing(`X')
qui drop if _merge==2
drop _merge
ren mainact_teacher_active_pct activeinstruct_linked
ren method_curric_pct_el methodcurric_linked		
qui save "$temp/mechanismtemp.dta", replace
	
* measure #2: school-level mean
qui use "$cleandata/teacher_merge_jde.dta", clear
qui collapse (mean) activeinstruct_schlavg=mainact_teacher_active_pct methodcurric_schlavg=method_curric_pct_el, by(school_code)

* merge with student data
merge 1:m school_code using "$temp/mechanismtemp.dta"
drop _merge

/*interact potential mechanisms w/ treatment.*/
local X "training_any_pct exchange_pct activeinstruct_linked activeinstruct_schlavg methodcurric_linked methodcurric_schlavg active_instruct_narrow_el active_instruct_broad_el"
foreach x in `X' {
	qui gen D_`x'=treatment*`x'
	lab var D_`x' "treatment * `x'"
}
qui save "$temp/mechanismtemp.dta", replace

* ANALYSIS
/*Goal: produce Table P8 (Panels A-C, columns 1-6 in each panel)
	Uses endline student survey.
	Regress outcome on treatment status and treatment interacted with potential mechanism. 
	Control for baseline outcome, main effect of potential mechanism, and randomization strata. 
	Cluster s.e.'s by school.
	Test sum of treatment and interaction term.
	In this Step 1, get p-values from treatment effect and interaction term.*/
local X "training_any_pct exchange_pct activeinstruct_linked activeinstruct_schlavg methodcurric_linked methodcurric_schlavg active_instruct_narrow_el"

/*Panel A: outcome = business involvement*/	
local y0 "ownbusiness_bli ownbusiness_blm"
local i=1 /*initialize loop*/
foreach x in `X' {	
	qui areg personal_business_el treatment D_`x' `x' `y0', a(strata) cluster(school_code)
	/*get p-value of treatment effect*/
	scalar define T8_panA_c`i'bt=_b[treatment]
	scalar define T8_panA_c`i'set=_se[treatment]
	scalar define t=_b[treatment]/_se[treatment]
	scalar define T8_panA_c`i'pt=2*ttail(e(df_r),abs(t))
	/*get p-value of interaction term*/
	scalar define T8_panA_c`i'bi=_b[D_`x']
	scalar define T8_panA_c`i'sei=_se[D_`x']
	scalar define t=_b[D_`x']/_se[D_`x']
	scalar define T8_panA_c`i'pi=2*ttail(e(df_r),abs(t))
	local i=`i'+1
}

/*Panel B: outcome = employment*/	
local y0 "earn_money_bli earn_money_blm"
local i=1 /*initialize loop*/
foreach x in `X' {
	qui areg employed_el treatment D_`x' `x' `y0', a(strata) cluster(school_code)
	/*get p-value of treatment effect*/
	scalar define T8_panB_c`i'bt=_b[treatment]
	scalar define T8_panB_c`i'set=_se[treatment]
	scalar define t=_b[treatment]/_se[treatment]
	scalar define T8_panB_c`i'pt=2*ttail(e(df_r),abs(t))
	/*get p-value of interaction term*/
	scalar define T8_panB_c`i'bi=_b[D_`x']
	scalar define T8_panB_c`i'sei=_se[D_`x']
	scalar define t=_b[D_`x']/_se[D_`x']
	scalar define T8_panB_c`i'pi=2*ttail(e(df_r),abs(t))
	local i=`i'+1
}

/*Panel C: outcome = income*/	
local y0 "earn_last2mths_usdw_bli earn_last2mths_usdw_blm"
local i=1 /*initialize loop*/
foreach x in `X' {
	qui areg earn_last2mths_usdw_el treatment D_`x' `x' `y0', a(strata) cluster(school_code)
	/*get p-value of treatment effect*/
	scalar define T8_panC_c`i'bt=_b[treatment]
	scalar define T8_panC_c`i'set=_se[treatment]
	scalar define t=_b[treatment]/_se[treatment]
	scalar define T8_panC_c`i'pt=2*ttail(e(df_r),abs(t))
	/*get p-value of interaction term*/
	scalar define T8_panC_c`i'bi=_b[D_`x']
	scalar define T8_panC_c`i'sei=_se[D_`x']
	scalar define t=_b[D_`x']/_se[D_`x']
	scalar define T8_panC_c`i'pi=2*ttail(e(df_r),abs(t))
	local i=`i'+1
}	

* 2. CALCULATE Q-VALUES FOR ALL HYPOTHESES
/*Calculate sharpened q-values from Benjamini, Krieger, Yuketeli (2006).
	Use code from fdr_sharpened_qvalues.do from Anderson (2008), available at 
		https://are.berkeley.edu/~mlanderson/ARE_Website/Research.html*/
* prep data: put p-values from above into a dataset
clear
local I=`i'-1 	/*#cols in table*/
local J=3 		/*#panels in table*/
local n=`I'*`J'*2	/*# of hypotheses tested (x2 because treatment effect & interaction for each outcome*/
qui set obs `n'
qui gen table=8
qui egen panel=seq(), from(1) to(`J')
qui bysort panel: egen column=seq(), from(1) to(`I')
qui bysort panel column: egen interaction=seq(), from(0) to(1)
foreach x in b se pval {
	qui gen double `x'=.
}
forval j=1/`J' { /*loop through panels*/
	if (`j'==1) local jj "A"
	if (`j'==2) local jj "B"
	if (`j'==3) local jj "C"
	forval i=1/`I' {
		qui replace b=T8_pan`jj'_c`i'bt if panel==`j' & column==`i' & interaction==0
		qui replace se=T8_pan`jj'_c`i'set if panel==`j' & column==`i' & interaction==0
		qui replace pval=T8_pan`jj'_c`i'pt if panel==`j' & column==`i' & interaction==0 
		qui replace b=T8_pan`jj'_c`i'bi if panel==`j' & column==`i' & interaction==1
		qui replace se=T8_pan`jj'_c`i'sei if panel==`j' & column==`i' & interaction==1
		qui replace pval=T8_pan`jj'_c`i'pi if panel==`j' & column==`i' & interaction==1 
	}
}
sort table panel column interaction

* START OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified)
* Collect the total number of p-values tested
quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Generate the variable that will contain the BKY (2006) sharpened q-values

qui gen bky06_qval = 1 if pval~=.
	
* Set the initial counter to 1 

local qval = 1

/* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, 
	then checks which hypotheses are rejected at q = 0.999, 
	then checks which hypotheses are rejected at q = 0.998, etc.  
	The loop ends by checking which hypotheses are rejected at q = 0.001.  */
while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	
	* Generate value q'*r/M
	qui gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	
	* Generate binary variable checking condition p(r) <= q'*r/M
	qui gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	
	* Generate variable containing p-value ranks for all p-values that meet above condition
	qui gen reject_rank1 = reject_temp1*rank
	
	* Record the rank of the largest p-value that meets above condition
	qui egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	
	* Generate value q_2st*r/M
	qui gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	qui gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	
	* Generate variable containing p-value ranks for all p-values that meet above condition
	qui gen reject_rank2 = reject_temp2*rank
	
	* Record the rank of the largest p-value that meets above condition
	qui egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	qui replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}

quietly sort original_sorting_order

**********************************************************************
* END OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified)
**********************************************************************
* list results
list table panel column interaction b se pval bky06_qval

* prep results for inclusion in final regression output
sort table panel column interaction
local k=1
forval j=1/`J' { /*loop through panels*/
	if (`j'==1) local jj "A"
	if (`j'==2) local jj "B"
	if (`j'==3) local jj "C"
	forval i=1/`I' {
		scalar define T8_pan`jj'_c`i'qt=bky06_qval in `k'
		local k=`k'+1
		scalar define T8_pan`jj'_c`i'qi=bky06_qval in `k'
		local k=`k'+1
	}
}

* 3. RE-RUN REGRESSIONS TO GET OUTPUT, INCLUDING Q-VALUES
qui use "$temp/mechanismtemp.dta", clear

* ANALYSIS
/*Goal: produce Table P8 (Panels A-C, columns 1-6 in each panel)
	Uses endline student survey.
	Regress outcome on treatment status and treatment interacted with potential mechanism. 
	Control for baseline outcome, main effect of potential mechanism, and randomization strata. 
	Cluster s.e.'s by school.
	Test sum of treatment and interaction term.*/
local stat ""control mean",mu_c,"treatment + interaction",coef_sum,"se(treatment + interaction)",se_sum,"interaction mean",mu_x,"q-value (treatment)",qvt,"q-value (interaction)",qvi"
local X "exchange_pct activeinstruct_linked activeinstruct_schlavg methodcurric_linked methodcurric_schlavg active_instruct_narrow_el"

/*Panel A: outcome = business involvement*/	
local y0 "ownbusiness_bli ownbusiness_blm"
local i=1

* training_any_pct
qui areg personal_business_el treatment D_training_any_pct training_any_pct `y0', a(strata) cluster(school_code)
qui lincom treatment+D_training_any_pct
scalar define coef_sum=r(estimate)
scalar define se_sum=r(se)
qui su training_any_pct if e(sample)
scalar define mu_x=r(mean)
qui su personal_business_el if treatment==0 & e(sample)
scalar define mu_c=r(mean)
scalar define qvt=T8_panA_c`i'qt
scalar define qvi=T8_panA_c`i'qi	
outreg2 treatment D_training_any_pct using "$results/rwanda_jde_tableA14.xls", se excel nolabel nocons addstat(`stat') replace
local i=`i'+1

foreach x in `X' {	
	qui areg personal_business_el treatment D_`x' `x' `y0', a(strata) cluster(school_code)
	qui lincom treatment+D_`x'
	scalar define coef_sum=r(estimate)
	scalar define se_sum=r(se)
	qui su `x' if e(sample)
	scalar define mu_x=r(mean)
	qui su personal_business_el if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	scalar define qvt=T8_panA_c`i'qt
	scalar define qvi=T8_panA_c`i'qi	
	outreg2 treatment D_`x' using "$results/rwanda_jde_tableA14.xls", se excel nolabel nocons addstat(`stat') append
	local i=`i'+1
}

/*Panel B: outcome = employment*/	
local y0 "earn_money_bli earn_money_blm"
local i=1
local X "training_any_pct exchange_pct activeinstruct_linked activeinstruct_schlavg methodcurric_linked methodcurric_schlavg active_instruct_narrow_el"
foreach x in `X' {
	qui areg employed_el treatment D_`x' `x' `y0', a(strata) cluster(school_code)
	qui lincom treatment+D_`x'
	scalar define coef_sum=r(estimate)
	scalar define se_sum=r(se)
	qui su `x' if e(sample)
	scalar define mu_x=r(mean)
	qui su employed_el if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	scalar define qvt=T8_panB_c`i'qt
	scalar define qvi=T8_panB_c`i'qi	
	outreg2 treatment D_`x' using "$results/rwanda_jde_tableA14.xls", se excel nolabel nocons addstat(`stat') append
	local i=`i'+1
}

/*Panel C: outcome = income*/	
local y0 "earn_last2mths_usdw_bli earn_last2mths_usdw_blm"
local i=1
foreach x in `X' {
	qui areg earn_last2mths_usdw_el treatment D_`x' `x' `y0', a(strata) cluster(school_code)
	qui lincom treatment+D_`x'
	scalar define coef_sum=r(estimate)
	scalar define se_sum=r(se)
	qui su `x' if e(sample)
	scalar define mu_x=r(mean)
	qui su earn_last2mths_usdw_el if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	scalar define qvt=T8_panC_c`i'qt
	scalar define qvi=T8_panC_c`i'qi	
	outreg2 treatment D_`x' using "$results/rwanda_jde_tableA14.xls", se excel nolabel nocons addstat(`stat') append
	local i=`i'+1
}	


erase "$temp/mechanismtemp.dta"
local end=`"$S_TIME"' 
di "`start'"
di "`end'"
log close
